catch JSONDecodeError when loading repodata cache metadata (#13101)

* catch JSONDecodeError when loading repodata cache metadata
conda · Sep 14, 2023 · c76f5aa · c76f5aa
1 parent 3324ec9
commit c76f5aa
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 3 deletions.
diff --git a/conda/gateways/repodata/__init__.py b/conda/gateways/repodata/__init__.py
@@ -579,7 +579,9 @@ def load_state(self):
         """
         try:
             self.load(state_only=True)
-        except FileNotFoundError:  # or JSONDecodeError?
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            if isinstance(e, json.JSONDecodeError):
+                warnings.warn(f"{e.__class__.__name__} loading {self.cache_path_state}")
             self.state.clear()
         return self.state
 
@@ -607,7 +609,7 @@ def replace(self, temp_path: Path):
         adjacent to `self.cache_path_json` to be on the same filesystem.
         """
         with self.cache_path_state.open("a+") as state_file, lock(state_file):
-            # "a+" avoids trunctating file before we have the lock and creates
+            # "a+" creates the file if necessary, does not trunctate file.
             state_file.seek(0)
             state_file.truncate()
             stat = temp_path.stat()
@@ -627,8 +629,9 @@ def refresh(self, refresh_ns=0):
         """
         Update access time in cache info file to indicate a HTTP 304 Not Modified response.
         """
+        # Note this is not thread-safe.
         with self.cache_path_state.open("a+") as state_file, lock(state_file):
-            # "a+" avoids trunctating file before we have the lock and creates
+            # "a+" creates the file if necessary, does not trunctate file.
             state_file.seek(0)
             state_file.truncate()
             self.state["refresh_ns"] = refresh_ns or time.time_ns()

diff --git a/news/13056-jsondecodeerror b/news/13056-jsondecodeerror
@@ -0,0 +1,20 @@
+### Enhancements
+
+* <news item>
+
+### Bug fixes
+
+* Treat `JSONDecodeError` on `repodata.info.json` as a warning, equivalent to a
+  missing `repodata.info.json` (#13056)
+
+### Deprecations
+
+* <news item>
+
+### Docs
+
+* <news item>
+
+### Other
+
+* <news item>
diff --git a/tests/gateways/test_jlap.py b/tests/gateways/test_jlap.py
@@ -302,6 +302,55 @@ def test_repodata_state(
             assert f"_{field}" not in state
 
 
+@pytest.mark.parametrize("use_jlap", [True, False])
+def test_repodata_info_jsondecodeerror(
+    package_server: socket,
+    use_jlap: bool,
+):
+    """Test that cache metadata file works correctly."""
+    host, port = package_server.getsockname()
+    base = f"http://{host}:{port}/test"
+    channel_url = f"{base}/osx-64"
+
+    if use_jlap:
+        repo_cls = interface.JlapRepoInterface
+    else:
+        repo_cls = CondaRepoInterface
+
+    with env_vars(
+        {"CONDA_PLATFORM": "osx-64", "CONDA_EXPERIMENTAL": "jlap" if use_jlap else ""},
+        stack_callback=conda_tests_ctxt_mgmt_def_pol,
+    ):
+        SubdirData.clear_cached_local_channel_data(
+            exclude_file=False
+        )  # definitely clears them, including normally-excluded file:// urls
+
+        test_channel = Channel(channel_url)
+        sd = SubdirData(channel=test_channel)
+
+        # parameterize whether this is used?
+        assert isinstance(sd._repo, repo_cls)
+
+        print(sd.repodata_fn)
+
+        assert sd._loaded is False
+        # shoud automatically fetch and load
+        assert len(list(sd.iter_records()))
+        assert sd._loaded is True
+
+        # Corrupt the cache state. Double json could happen when (unadvisably)
+        # running conda in parallel, before we added locks-by-default.
+        sd.cache_path_state.write_text(sd.cache_path_state.read_text() * 2)
+
+        # now try to re-download
+        SubdirData.clear_cached_local_channel_data(exclude_file=False)
+        sd2 = SubdirData(channel=test_channel)
+
+        # warnings.warn(f"{e.__class__.__name__} loading {self.cache_path_state}")
+        with pytest.warns(UserWarning, match=" loading "):
+            sd2.load()
+
+
 @pytest.mark.parametrize("use_jlap", ["jlap", "jlapopotamus", "jlap,another", ""])
 def test_jlap_flag(use_jlap):
     """Test that CONDA_EXPERIMENTAL is a comma-delimited list."""