Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions python/PiFinder/catalog_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Pickle-based cache for the output of CatalogBuilder._build_composite.

Cache layout under ~/PiFinder_data/cache/catalogs/:
composite_objects.pkl — pickled {composite_objects, catalogs_info}
composite_objects.meta.json — fingerprint for invalidation

The `logged` flag on each CompositeObject is user state; it is reset to False
before pickling and re-applied from the observations DB after load.
"""
from __future__ import annotations

import json
import logging
import pickle
import sys
from typing import Dict, List, Optional, Tuple

from PiFinder.composite_object import CompositeObject
from PiFinder.utils import data_dir, pifinder_db

logger = logging.getLogger("Catalog.Cache")

# Bump when CompositeObject shape, _create_full_composite_object output, or
# the pickled payload structure changes.
CACHE_VERSION = 1

CACHE_DIR = data_dir / "cache" / "catalogs"
PICKLE_PATH = CACHE_DIR / "composite_objects.pkl"
META_PATH = CACHE_DIR / "composite_objects.meta.json"


def _fingerprint() -> Dict:
st = pifinder_db.stat()
return {
"cache_version": CACHE_VERSION,
"db_path": str(pifinder_db.resolve()),
"db_mtime_ns": st.st_mtime_ns,
"db_size": st.st_size,
"python_version": f"{sys.version_info.major}.{sys.version_info.minor}",
"pickle_protocol": pickle.HIGHEST_PROTOCOL,
}


def load() -> Optional[Tuple[List[CompositeObject], Dict[str, Dict]]]:
"""Return (composite_objects, catalogs_info) if cache is valid, else None.

Returns None on any failure (missing files, stale fingerprint, corrupt pickle).
Resets `logged=False` on returned objects — caller must re-apply from obs_db.
"""
if not PICKLE_PATH.exists() or not META_PATH.exists():
return None
try:
with META_PATH.open() as f:
stored_meta = json.load(f)
except Exception as e:
logger.warning("Cache meta unreadable, ignoring cache: %s", e)
return None

current_meta = _fingerprint()
if stored_meta != current_meta:
logger.info(
"Catalog cache fingerprint mismatch; will rebuild. stored=%s current=%s",
stored_meta,
current_meta,
)
return None

try:
with PICKLE_PATH.open("rb") as f:
data = pickle.load(f)
composite_objects = data["composite_objects"]
catalogs_info = data["catalogs_info"]
except Exception as e:
logger.warning("Cache pickle unreadable, ignoring cache: %s", e)
return None

for obj in composite_objects:
obj.logged = False

return composite_objects, catalogs_info


def save(
composite_objects: List[CompositeObject], catalogs_info: Dict[str, Dict]
) -> None:
"""Write the cache. Never raises — logs errors instead.

Strips `logged` to False so the cache is stable across sessions.
Writes the pickle atomically via tmp + rename to avoid torn writes.
"""
try:
CACHE_DIR.mkdir(parents=True, exist_ok=True)

for obj in composite_objects:
obj.logged = False

payload = {
"composite_objects": composite_objects,
"catalogs_info": catalogs_info,
}

tmp_pkl = PICKLE_PATH.with_suffix(".pkl.tmp")
with tmp_pkl.open("wb") as f:
pickle.dump(payload, f, protocol=pickle.HIGHEST_PROTOCOL)
tmp_pkl.replace(PICKLE_PATH)

with META_PATH.open("w") as f:
json.dump(_fingerprint(), f, indent=2)

logger.info(
"Catalog cache written: %d composite objects -> %s",
len(composite_objects),
PICKLE_PATH,
)
except Exception as e:
logger.error("Failed to write catalog cache: %s", e, exc_info=True)


def clear() -> None:
"""Remove cache files. Used by tests and for manual invalidation."""
for p in (PICKLE_PATH, META_PATH):
try:
p.unlink()
except FileNotFoundError:
pass
87 changes: 66 additions & 21 deletions python/PiFinder/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
TimerMixin,
VirtualIDManager,
)
from PiFinder import catalog_cache

logger = logging.getLogger("Catalog")

Expand Down Expand Up @@ -859,34 +860,70 @@ def build(self, shared_state, ui_queue=None) -> Catalogs:
shared_state: Shared state object
ui_queue: Optional queue to signal completion (for main loop integration)
"""
db: Database = ObjectsDatabase()
obs_db: Database = ObservationsDatabase()

# list of dicts, one dict for each entry in the catalog_objects table
catalog_objects: List[Dict] = [dict(row) for row in db.get_catalog_objects()]
objects = db.get_objects()
common_names = Names()
catalogs_info = db.get_catalogs_dict()
objects = {row["id"]: dict(row) for row in objects}
cached = catalog_cache.load()
if cached is not None:
composite_objects, catalogs_info = cached
obs_db.load_observed_objects_cache()
for obj in composite_objects:
obj.logged = obs_db.check_logged(obj)

composite_objects: List[CompositeObject] = self._build_composite(
catalog_objects, objects, common_names, obs_db, ui_queue
)
self.catalog_dicts = {}
logger.info(
"Loaded %i objects from catalog cache", len(composite_objects)
)

all_catalogs: Catalogs = self._get_catalogs(
composite_objects, catalogs_info
)

# All objects loaded synchronously from cache — no background
# loader, no completion signal (there is nothing for the UI to
# transition from).
self._background_loader = None
self._pending_catalogs_ref = all_catalogs
else:
db: Database = ObjectsDatabase()

# list of dicts, one dict for each entry in the catalog_objects table
catalog_objects: List[Dict] = [
dict(row) for row in db.get_catalog_objects()
]
objects = db.get_objects()
common_names = Names()
catalogs_info = db.get_catalogs_dict()
objects = {row["id"]: dict(row) for row in objects}

composite_objects = self._build_composite(
catalog_objects, objects, common_names, obs_db, ui_queue
)

# Cache write context for _on_loader_complete
self._cache_priority_objects = list(composite_objects)
self._cache_catalogs_info = catalogs_info

# This is used for caching catalog dicts
# to speed up repeated searches
self.catalog_dicts = {}
logger.debug("Loaded %i objects from database", len(composite_objects))
# This is used for caching catalog dicts
# to speed up repeated searches
self.catalog_dicts = {}
logger.debug("Loaded %i objects from database", len(composite_objects))

all_catalogs: Catalogs = self._get_catalogs(composite_objects, catalogs_info)
all_catalogs = self._get_catalogs(composite_objects, catalogs_info)

# Store catalogs reference for background loader completion
self._pending_catalogs_ref = all_catalogs
# Store catalogs reference for background loader completion
self._pending_catalogs_ref = all_catalogs

# Pass background loader reference to Catalogs instance so it can check loading status
# This is set in _build_composite() if there are deferred objects
if hasattr(self, "_background_loader") and self._background_loader is not None:
all_catalogs._background_loader = self._background_loader
# Pass background loader reference to Catalogs instance so it can check loading status
# This is set in _build_composite() if there are deferred objects
if (
hasattr(self, "_background_loader")
and self._background_loader is not None
):
all_catalogs._background_loader = self._background_loader
else:
# No deferred objects — write cache immediately since
# _on_loader_complete will never fire.
catalog_cache.save(list(composite_objects), catalogs_info)
# Initialize planet catalog with whatever date we have for now
# This will be re-initialized on activation of Catalog ui module
# if we have GPS lock
Expand Down Expand Up @@ -1046,6 +1083,14 @@ def _on_loader_complete(
if catalog.catalog_filter:
catalog.filter_objects()

# Persist the full composite list (priority + deferred) for next startup.
priority_objects = getattr(self, "_cache_priority_objects", []) or []
catalogs_info_for_cache = getattr(self, "_cache_catalogs_info", None)
if catalogs_info_for_cache is not None:
catalog_cache.save(
priority_objects + list(loaded_objects), catalogs_info_for_cache
)

# Signal main loop that catalogs are fully loaded
if ui_queue:
try:
Expand Down
7 changes: 3 additions & 4 deletions python/PiFinder/db/observations_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@ def log_object(self, session_uuid, obs_time, catalog, sequence, solution, notes)
self.conn.commit()

# Update cache so filters reflect the new observation immediately
if (catalog, sequence) not in self.observed_objects_cache:
self.observed_objects_cache.append((catalog, sequence))
self.observed_objects_cache.add((catalog, sequence))

observation_id = self.cursor.execute(
"select last_insert_rowid() as id"
Expand All @@ -146,9 +145,9 @@ def load_observed_objects_cache(self) -> None:
"""
(re)Loads the logged object cache
"""
self.observed_objects_cache: list[tuple[str, int]] = [
self.observed_objects_cache: set[tuple[str, int]] = {
(x["catalog"], x["sequence"]) for x in self.get_observed_objects()
]
}

def check_logged(self, obj_record: CompositeObject):
"""
Expand Down
128 changes: 128 additions & 0 deletions python/tests/test_catalog_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""Tests for PiFinder.catalog_cache."""
import os
import pickle
import pytest

from PiFinder import catalog_cache
from PiFinder.composite_object import CompositeObject, MagnitudeObject


def _make_obj(seq: int, catalog_code: str = "NGC", logged: bool = False):
obj = CompositeObject(
id=seq,
object_id=seq,
ra=1.0 * seq,
dec=-1.0 * seq,
catalog_code=catalog_code,
sequence=seq,
description=f"obj {seq}",
mag=MagnitudeObject([6.5]),
logged=logged,
)
return obj


@pytest.fixture
def cache_paths(tmp_path, monkeypatch):
"""Redirect cache files into tmp_path and provide a fake source DB to fingerprint."""
fake_db = tmp_path / "pifinder_objects.db"
fake_db.write_bytes(b"\x00" * 128)

pkl = tmp_path / "composite_objects.pkl"
meta = tmp_path / "composite_objects.meta.json"

monkeypatch.setattr(catalog_cache, "CACHE_DIR", tmp_path)
monkeypatch.setattr(catalog_cache, "PICKLE_PATH", pkl)
monkeypatch.setattr(catalog_cache, "META_PATH", meta)
monkeypatch.setattr(catalog_cache, "pifinder_db", fake_db)

return {"db": fake_db, "pkl": pkl, "meta": meta, "dir": tmp_path}


@pytest.mark.unit
def test_load_returns_none_when_no_cache(cache_paths):
assert catalog_cache.load() is None


@pytest.mark.unit
def test_roundtrip_preserves_objects_and_info(cache_paths):
objs = [_make_obj(i) for i in range(5)]
info = {"NGC": {"desc": "ngc", "max_sequence": 100}}

catalog_cache.save(objs, info)
loaded = catalog_cache.load()

assert loaded is not None
out_objs, out_info = loaded
assert len(out_objs) == 5
assert out_info == info
assert [o.sequence for o in out_objs] == [0, 1, 2, 3, 4]
assert [o.catalog_code for o in out_objs] == ["NGC"] * 5


@pytest.mark.unit
def test_logged_is_not_persisted(cache_paths):
"""logged=True must be reset on save so user state doesn't leak across sessions."""
objs = [_make_obj(i, logged=True) for i in range(3)]
catalog_cache.save(objs, {})

with cache_paths["pkl"].open("rb") as f:
payload = pickle.load(f)

assert all(o.logged is False for o in payload["composite_objects"])

# And load() also returns logged=False
loaded = catalog_cache.load()
assert loaded is not None
out_objs, _ = loaded
assert all(o.logged is False for o in out_objs)


@pytest.mark.unit
def test_fingerprint_mismatch_invalidates(cache_paths):
"""If the source DB changes (mtime or size), the cache must be rejected."""
objs = [_make_obj(0)]
catalog_cache.save(objs, {})
assert catalog_cache.load() is not None # sanity

# Bump the DB mtime and rewrite it to a new size — the cache should now be stale.
cache_paths["db"].write_bytes(b"\x01" * 256)
# Touch mtime to be safe even on fast filesystems.
new_time = cache_paths["db"].stat().st_mtime + 10
os.utime(cache_paths["db"], (new_time, new_time))

assert catalog_cache.load() is None


@pytest.mark.unit
def test_corrupt_pickle_returns_none(cache_paths):
catalog_cache.save([_make_obj(0)], {})
# Sanity check first.
assert catalog_cache.load() is not None

# Corrupt the pickle without touching the meta file.
cache_paths["pkl"].write_bytes(b"not a pickle")

assert catalog_cache.load() is None


@pytest.mark.unit
def test_corrupt_meta_returns_none(cache_paths):
catalog_cache.save([_make_obj(0)], {})
cache_paths["meta"].write_text("not json{{")

assert catalog_cache.load() is None


@pytest.mark.unit
def test_clear_removes_files(cache_paths):
catalog_cache.save([_make_obj(0)], {})
assert cache_paths["pkl"].exists() and cache_paths["meta"].exists()

catalog_cache.clear()

assert not cache_paths["pkl"].exists()
assert not cache_paths["meta"].exists()

# Calling clear when files are already gone must not raise.
catalog_cache.clear()
Loading