constructorfleet · Teagan42 · Oct 7, 2025 · Oct 7, 2025
diff --git a/docker/pyproject.deps.toml b/docker/pyproject.deps.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-plex"
-version = "2.0.3"
+version = "2.0.4"
 requires-python = ">=3.11,<3.13"
 dependencies = [
   "fastmcp>=2.11.2",

diff --git a/mcp_plex/loader/__init__.py b/mcp_plex/loader/__init__.py
@@ -26,19 +26,16 @@
     PersistenceQueue,
     chunk_sequence,
 )
-from ..common.validation import coerce_plex_tag_id, require_positive
+from ..common.validation import require_positive
 from .pipeline.orchestrator import LoaderOrchestrator
 from .pipeline.persistence import PersistenceStage as _PersistenceStage
 from ..common.types import (
     AggregatedItem,
     IMDbTitle,
-    PlexGuid,
-    PlexItem,
-    PlexPerson,
-    TMDBMovie,
-    TMDBShow,
 )
 from . import qdrant as _qdrant
+from . import samples as samples
+from .samples import _load_from_sample as _load_from_sample
 
 _DENSE_MODEL_PARAMS = _qdrant._DENSE_MODEL_PARAMS
 _resolve_dense_model_params = _qdrant._resolve_dense_model_params
@@ -169,130 +166,6 @@ def _persist_imdb_retry_queue(path: Path, queue: IMDbRetryQueue) -> None:
     path.write_text(json.dumps(queue.snapshot()))
 
 
-def _load_from_sample(sample_dir: Path) -> list[AggregatedItem]:
-    """Load items from local sample JSON files."""
-
-    results: list[AggregatedItem] = []
-    movie_dir = sample_dir / "movie"
-    episode_dir = sample_dir / "episode"
-
-    # Movie sample
-    with (movie_dir / "plex.json").open("r", encoding="utf-8") as f:
-        movie_data = json.load(f)["MediaContainer"]["Metadata"][0]
-    plex_movie = PlexItem(
-        rating_key=str(movie_data.get("ratingKey", "")),
-        guid=str(movie_data.get("guid", "")),
-        type=movie_data.get("type", "movie"),
-        title=movie_data.get("title", ""),
-        summary=movie_data.get("summary"),
-        year=movie_data.get("year"),
-        added_at=movie_data.get("addedAt"),
-        guids=[PlexGuid(id=g["id"]) for g in movie_data.get("Guid", [])],
-        thumb=movie_data.get("thumb"),
-        art=movie_data.get("art"),
-        tagline=movie_data.get("tagline"),
-        content_rating=movie_data.get("contentRating"),
-        directors=[
-            PlexPerson(
-                id=coerce_plex_tag_id(d.get("id", 0)),
-                tag=d.get("tag", ""),
-                thumb=d.get("thumb"),
-            )
-            for d in movie_data.get("Director", [])
-        ],
-        writers=[
-            PlexPerson(
-                id=coerce_plex_tag_id(w.get("id", 0)),
-                tag=w.get("tag", ""),
-                thumb=w.get("thumb"),
-            )
-            for w in movie_data.get("Writer", [])
-        ],
-        actors=[
-            PlexPerson(
-                id=coerce_plex_tag_id(a.get("id", 0)),
-                tag=a.get("tag", ""),
-                role=a.get("role"),
-                thumb=a.get("thumb"),
-            )
-            for a in movie_data.get("Role", [])
-        ],
-        genres=[g.get("tag", "") for g in movie_data.get("Genre", []) if g.get("tag")],
-        collections=[
-            c.get("tag", "")
-            for key in ("Collection", "Collections")
-            for c in movie_data.get(key, []) or []
-            if c.get("tag")
-        ],
-    )
-    with (movie_dir / "imdb.json").open("r", encoding="utf-8") as f:
-        imdb_movie = IMDbTitle.model_validate(json.load(f))
-    with (movie_dir / "tmdb.json").open("r", encoding="utf-8") as f:
-        tmdb_movie = TMDBMovie.model_validate(json.load(f))
-    results.append(AggregatedItem(plex=plex_movie, imdb=imdb_movie, tmdb=tmdb_movie))
-
-    # Episode sample
-    with (episode_dir / "plex.tv.json").open("r", encoding="utf-8") as f:
-        episode_data = json.load(f)["MediaContainer"]["Metadata"][0]
-    plex_episode = PlexItem(
-        rating_key=str(episode_data.get("ratingKey", "")),
-        guid=str(episode_data.get("guid", "")),
-        type=episode_data.get("type", "episode"),
-        title=episode_data.get("title", ""),
-        show_title=episode_data.get("grandparentTitle"),
-        season_title=episode_data.get("parentTitle"),
-        season_number=episode_data.get("parentIndex"),
-        episode_number=episode_data.get("index"),
-        summary=episode_data.get("summary"),
-        year=episode_data.get("year"),
-        added_at=episode_data.get("addedAt"),
-        guids=[PlexGuid(id=g["id"]) for g in episode_data.get("Guid", [])],
-        thumb=episode_data.get("thumb"),
-        art=episode_data.get("art"),
-        tagline=episode_data.get("tagline"),
-        content_rating=episode_data.get("contentRating"),
-        directors=[
-            PlexPerson(
-                id=coerce_plex_tag_id(d.get("id", 0)),
-                tag=d.get("tag", ""),
-                thumb=d.get("thumb"),
-            )
-            for d in episode_data.get("Director", [])
-        ],
-        writers=[
-            PlexPerson(
-                id=coerce_plex_tag_id(w.get("id", 0)),
-                tag=w.get("tag", ""),
-                thumb=w.get("thumb"),
-            )
-            for w in episode_data.get("Writer", [])
-        ],
-        actors=[
-            PlexPerson(
-                id=coerce_plex_tag_id(a.get("id", 0)),
-                tag=a.get("tag", ""),
-                role=a.get("role"),
-                thumb=a.get("thumb"),
-            )
-            for a in episode_data.get("Role", [])
-        ],
-        genres=[g.get("tag", "") for g in episode_data.get("Genre", []) if g.get("tag")],
-        collections=[
-            c.get("tag", "")
-            for key in ("Collection", "Collections")
-            for c in episode_data.get(key, []) or []
-            if c.get("tag")
-        ],
-    )
-    with (episode_dir / "imdb.tv.json").open("r", encoding="utf-8") as f:
-        imdb_episode = IMDbTitle.model_validate(json.load(f))
-    with (episode_dir / "tmdb.tv.json").open("r", encoding="utf-8") as f:
-        tmdb_show = TMDBShow.model_validate(json.load(f))
-    results.append(AggregatedItem(plex=plex_episode, imdb=imdb_episode, tmdb=tmdb_show))
-
-    return results
-
-
 def _build_loader_orchestrator(
     *,
     client: AsyncQdrantClient,
@@ -501,7 +374,7 @@ async def run(
     items: list[AggregatedItem]
     if sample_dir is not None:
         logger.info("Loading sample data from %s", sample_dir)
-        sample_items = _load_from_sample(sample_dir)
+        sample_items = samples._load_from_sample(sample_dir)
         orchestrator, items, qdrant_retry_queue = _build_loader_orchestrator(
             client=client,
             collection_name=collection_name,

diff --git a/mcp_plex/loader/samples.py b/mcp_plex/loader/samples.py
@@ -0,0 +1,148 @@
+"""Helpers for working with built-in sample data files."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Iterable
+
+from ..common.types import (
+    AggregatedItem,
+    IMDbTitle,
+    PlexGuid,
+    PlexItem,
+    PlexPerson,
+    TMDBMovie,
+    TMDBShow,
+)
+from ..common.validation import coerce_plex_tag_id
+
+
+def _read_json(path: Path) -> Any:
+    """Return parsed JSON content from ``path``."""
+
+    with path.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def _load_people(
+    entries: Iterable[dict[str, Any]] | None,
+    *,
+    include_role: bool,
+) -> list[PlexPerson]:
+    """Construct :class:`PlexPerson` objects from Plex JSON entries."""
+
+    people: list[PlexPerson] = []
+    for entry in entries or []:
+        person_kwargs: dict[str, Any] = {
+            "id": coerce_plex_tag_id(entry.get("id", 0)),
+            "tag": entry.get("tag", ""),
+            "thumb": entry.get("thumb"),
+        }
+        if include_role:
+            person_kwargs["role"] = entry.get("role")
+        people.append(PlexPerson(**person_kwargs))
+    return people
+
+
+def _load_collections(data: dict[str, Any]) -> list[str]:
+    """Extract collection tags from Plex metadata."""
+
+    collections: list[str] = []
+    for key in ("Collection", "Collections"):
+        entries = data.get(key) or []
+        for entry in entries:
+            tag = entry.get("tag")
+            if tag:
+                collections.append(tag)
+    return collections
+
+
+def _load_plex_movie(data: dict[str, Any]) -> PlexItem:
+    """Build a :class:`PlexItem` for the sample movie."""
+
+    return PlexItem(
+        rating_key=str(data.get("ratingKey", "")),
+        guid=str(data.get("guid", "")),
+        type=data.get("type", "movie"),
+        title=data.get("title", ""),
+        summary=data.get("summary"),
+        year=data.get("year"),
+        added_at=data.get("addedAt"),
+        guids=[PlexGuid(id=str(guid.get("id", ""))) for guid in data.get("Guid", []) or []],
+        thumb=data.get("thumb"),
+        art=data.get("art"),
+        tagline=data.get("tagline"),
+        content_rating=data.get("contentRating"),
+        directors=_load_people(data.get("Director"), include_role=False),
+        writers=_load_people(data.get("Writer"), include_role=False),
+        actors=_load_people(data.get("Role"), include_role=True),
+        genres=[
+            genre.get("tag", "")
+            for genre in data.get("Genre", []) or []
+            if genre.get("tag")
+        ],
+        collections=_load_collections(data),
+    )
+
+
+def _load_plex_episode(data: dict[str, Any]) -> PlexItem:
+    """Build a :class:`PlexItem` for the sample episode."""
+
+    return PlexItem(
+        rating_key=str(data.get("ratingKey", "")),
+        guid=str(data.get("guid", "")),
+        type=data.get("type", "episode"),
+        title=data.get("title", ""),
+        show_title=data.get("grandparentTitle"),
+        season_title=data.get("parentTitle"),
+        season_number=data.get("parentIndex"),
+        episode_number=data.get("index"),
+        summary=data.get("summary"),
+        year=data.get("year"),
+        added_at=data.get("addedAt"),
+        guids=[PlexGuid(id=str(guid.get("id", ""))) for guid in data.get("Guid", []) or []],
+        thumb=data.get("thumb"),
+        art=data.get("art"),
+        tagline=data.get("tagline"),
+        content_rating=data.get("contentRating"),
+        directors=_load_people(data.get("Director"), include_role=False),
+        writers=_load_people(data.get("Writer"), include_role=False),
+        actors=_load_people(data.get("Role"), include_role=True),
+        genres=[
+            genre.get("tag", "")
+            for genre in data.get("Genre", []) or []
+            if genre.get("tag")
+        ],
+        collections=_load_collections(data),
+    )
+
+
+def _load_from_sample(sample_dir: Path) -> list[AggregatedItem]:
+    """Load items from local sample JSON files."""
+
+    movie_dir = sample_dir / "movie"
+    episode_dir = sample_dir / "episode"
+
+    movie_data = _read_json(movie_dir / "plex.json")["MediaContainer"]["Metadata"][0]
+    imdb_movie = IMDbTitle.model_validate(_read_json(movie_dir / "imdb.json"))
+    tmdb_movie = TMDBMovie.model_validate(_read_json(movie_dir / "tmdb.json"))
+
+    episode_data = _read_json(episode_dir / "plex.tv.json")["MediaContainer"]["Metadata"][0]
+    imdb_episode = IMDbTitle.model_validate(_read_json(episode_dir / "imdb.tv.json"))
+    tmdb_show = TMDBShow.model_validate(_read_json(episode_dir / "tmdb.tv.json"))
+
+    return [
+        AggregatedItem(
+            plex=_load_plex_movie(movie_data),
+            imdb=imdb_movie,
+            tmdb=tmdb_movie,
+        ),
+        AggregatedItem(
+            plex=_load_plex_episode(episode_data),
+            imdb=imdb_episode,
+            tmdb=tmdb_show,
+        ),
+    ]
+
+
+__all__ = ["_load_from_sample"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mcp-plex"
-version = "2.0.3"
+version = "2.0.4"
 
 description = "Plex-Oriented Model Context Protocol Server"
 requires-python = ">=3.11,<3.13"

diff --git a/tests/test_loader_logging.py b/tests/test_loader_logging.py
@@ -6,6 +6,7 @@
 from click.testing import CliRunner
 
 from mcp_plex import loader
+from mcp_plex.loader import samples as loader_samples
 from mcp_plex.loader import cli as loader_cli
 from qdrant_client import models
 
@@ -47,7 +48,8 @@ def test_run_logs_upsert(monkeypatch, caplog):
 
 def test_run_logs_no_points(monkeypatch, caplog):
     monkeypatch.setattr(loader, "AsyncQdrantClient", DummyClient)
-    monkeypatch.setattr(loader, "_load_from_sample", lambda _: [])
+    monkeypatch.setattr(loader_samples, "_load_from_sample", lambda _: [])
+    monkeypatch.setattr(loader, "_load_from_sample", loader_samples._load_from_sample)
     sample_dir = Path(__file__).resolve().parents[1] / "sample-data"
     with caplog.at_level(logging.INFO):
         asyncio.run(loader.run(None, None, None, sample_dir, None, None))
@@ -83,13 +85,14 @@ def test_run_limits_concurrent_upserts(monkeypatch):
     started = asyncio.Queue()
     release_queue = asyncio.Queue()
     third_requested = asyncio.Event()
-    base_items = list(loader._load_from_sample(sample_dir))
+    base_items = list(loader_samples._load_from_sample(sample_dir))
 
     monkeypatch.setattr(
-        loader,
+        loader_samples,
         "_load_from_sample",
         lambda _: base_items + base_items[:1],
     )
+    monkeypatch.setattr(loader, "_load_from_sample", loader_samples._load_from_sample)
 
     upsert_calls = {"count": 0}
 

diff --git a/tests/test_loader_unit.py b/tests/test_loader_unit.py
@@ -17,7 +17,6 @@
     QdrantRuntimeConfig,
     _build_loader_orchestrator,
     _fetch_imdb,
-    _load_from_sample,
     _load_imdb_retry_queue,
     _persist_imdb_retry_queue,
     _process_imdb_retry_queue,
@@ -30,6 +29,7 @@
     build_point,
 )
 from mcp_plex.loader.pipeline.channels import IMDbRetryQueue
+from mcp_plex.loader import samples as loader_samples
 from mcp_plex.common.types import (
     AggregatedItem,
     IMDbName,
@@ -83,7 +83,7 @@ def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
     assert not hasattr(module, "PartialPlexObject")
 def test_load_from_sample_returns_items():
     sample_dir = Path(__file__).resolve().parents[1] / "sample-data"
-    items = _load_from_sample(sample_dir)
+    items = loader_samples._load_from_sample(sample_dir)
     assert len(items) == 2
     assert {i.plex.type for i in items} == {"movie", "episode"}
 
@@ -434,7 +434,7 @@ def test_build_point_includes_metadata():
 
 
 def test_loader_pipeline_processes_sample_batches(monkeypatch):
-    sample_items = _load_from_sample(
+    sample_items = loader_samples._load_from_sample(
         Path(__file__).resolve().parents[1] / "sample-data"
     )