common-workflow-language · mr-c · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025
diff --git a/Makefile b/Makefile
@@ -29,7 +29,7 @@ EXTRAS=
 PYSOURCES=$(wildcard ${MODULE}/**.py) setup.py
 DEVPKGS=diff_cover black pylint pep257 pydocstyle flake8 tox tox-pyenv \
 	isort wheel autoflake flake8-bugbear pyupgrade bandit \
-	-rtest-requirements.txt -rmypy-requirements.txt
+	-rtest-requirements.txt -rmypy-requirements.txt auto-walrus
 COVBASE=coverage run --append
 
 # Updating the Major & Minor version below?
@@ -183,7 +183,8 @@ mypy: $(filter-out setup.py,$(PYSOURCES))
 	MYPYPATH=$$MYPYPATH:mypy-stubs mypy $^
 
 pyupgrade: $(PYSOURCES)
-	pyupgrade --exit-zero-even-if-changed --py37-plus $^
+	pyupgrade --exit-zero-even-if-changed --py39-plus $^
+	auto-walrus $^
 
 release-test: FORCE
 	git diff-index --quiet HEAD -- || ( echo You have uncommitted changes, please commit them and try again; false )

diff --git a/cwlprov/prov.py b/cwlprov/prov.py
@@ -24,12 +24,12 @@
 
 import datetime
 import logging
+from collections.abc import Iterable
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
-    Iterable,
     List,
     Optional,
     Set,
@@ -159,7 +159,7 @@ def _prov_format(self, media_type: Optional[str]) -> Optional[Path]:
                 return self.ro.resolve_path(str(prov))
         return None
 
-    def _load_prov_document(self) -> Union[Tuple[None, None], Tuple[ProvBundle, Path]]:
+    def _load_prov_document(self) -> Union[tuple[None, None], tuple[ProvBundle, Path]]:
         # Preferred order
         candidates = ("xml", "json", "nt", "ttl", "rdf")
         # Note: Not all of these parse consistently with rdflib in py3
@@ -221,7 +221,7 @@ def label(self) -> Optional[QualifiedName]:
     def type(self) -> Optional[QualifiedName]:
         return self._prov_attr("prov:type")
 
-    def types(self) -> Set[QualifiedName]:
+    def types(self) -> set[QualifiedName]:
         return set(self._prov_attrs("prov:type"))
 
     @property
@@ -238,7 +238,7 @@ def __str__(self) -> str:
     def _prov_attr(self, attr: QualifiedNameCandidate) -> Optional[QualifiedName]:
         return first(self._prov_attrs(attr))
 
-    def _prov_attrs(self, attr: QualifiedNameCandidate) -> Set[QualifiedName]:
+    def _prov_attrs(self, attr: QualifiedNameCandidate) -> set[QualifiedName]:
         return self.record.get_attribute(attr)
 
 
@@ -382,7 +382,8 @@ def nameext(self) -> Any:
     def derivations(self) -> Iterable["Derivation"]:
         return self._records(ProvDerivation, Derivation, PROV_ATTR_USED_ENTITY)
 
-    def secondary_files(self) -> List[Any]:
+    def secondary_files(self) -> list[Any]:
+        """Return any SecondaryFiles for this Entity."""
         return [
             d.generated_entity()
             for d in self.derivations()

diff --git a/cwlprov/ro.py b/cwlprov/ro.py
@@ -27,9 +27,10 @@
 import logging
 import pathlib
 import urllib.parse
+from collections.abc import Iterable
 from contextlib import ExitStack
 from functools import partial
-from typing import TYPE_CHECKING, Iterable, Optional, Set, Union
+from typing import TYPE_CHECKING, Optional, Set, Union
 
 import arcp
 from bdbag.bdbagit import BDBag
@@ -181,18 +182,21 @@ def _uriref(
             return self.id_uriref
 
     @property
-    def conformsTo(self) -> Set[str]:
+    def conformsTo(self) -> set[str]:
+        """Find which things this RO conforms to."""
         resource = self._uriref()
         return set(map(str, self.manifest.objects(resource, DCTERMS.conformsTo)))
 
     @property
-    def createdBy(self) -> Set["Agent"]:
+    def createdBy(self) -> set["Agent"]:
+        """Find the set of Agents who created this RO."""
         resource = self._uriref()
         new_agent = partial(Agent, self.manifest)
         return set(map(new_agent, self.manifest.objects(resource, PAV.createdBy)))
 
     @property
-    def authoredBy(self) -> Set["Agent"]:
+    def authoredBy(self) -> set["Agent"]:
+        """Find the set of Agents who authored this RO."""
         resource = self._uriref()
         new_agent = partial(Agent, self.manifest)
         return set(map(new_agent, self.manifest.objects(resource, PAV.authoredBy)))
@@ -201,7 +205,7 @@ def annotations_about(
         self,
         path: Optional[Union[str, pathlib.PurePosixPath]] = None,
         uri: Optional[str] = None,
-    ) -> Set["Annotation"]:
+    ) -> set["Annotation"]:
         resource = self._uriref(path=path, uri=uri)
         new_annotation = partial(Annotation, self.manifest)
         return set(map(new_annotation, self.manifest.subjects(OA.hasTarget, resource)))
@@ -210,7 +214,7 @@ def annotations_with_content(
         self,
         path: Optional[Union[str, pathlib.PurePosixPath]] = None,
         uri: Optional[Union[str, Identifier]] = None,
-    ) -> Set["Annotation"]:
+    ) -> set["Annotation"]:
         resource = self._uriref(path=path, uri=uri)
         new_annotation = partial(Annotation, self.manifest)
         return set(map(new_annotation, self.manifest.subjects(OA.hasBody, resource)))
@@ -233,7 +237,7 @@ def provenance(
         self,
         path: Optional[Union[str, pathlib.PurePosixPath]] = None,
         uri: Optional[str] = None,
-    ) -> Optional[Set[Node]]:
+    ) -> Optional[set[Node]]:
         for a in self.annotations_about(path, uri):
             if a.motivatedBy == PROV.has_provenance:
                 return a.hasBodies
@@ -288,7 +292,7 @@ def hasBody(self) -> Optional[Node]:
         return next(self._graph.objects(self._id, OA.hasBody), None)
 
     @property
-    def hasBodies(self) -> Set[Node]:
+    def hasBodies(self) -> set[Node]:
         """Find the set of body Nodes of this Annotation."""
         return set(self._graph.objects(self._id, OA.hasBody))
 
@@ -298,7 +302,7 @@ def hasTarget(self) -> Optional[Identifier]:
         return next(self._graph.objects(self._id, OA.hasTarget), None)
 
     @property
-    def hasTargets(self) -> Set[Node]:
+    def hasTargets(self) -> set[Node]:
         """Find which Noes this Annotation targets."""
         return set(self._graph.objects(self._id, OA.hasTarget))
 
@@ -343,10 +347,8 @@ def __repr__(self) -> str:
     def __str__(self) -> str:
         """Print the name, identifier, and uri of this Agent."""
         s = str(self.name) or "(unknown)"
-        o = self.orcid
-        if o:
+        if o := self.orcid:
             s += " <%s>" % o
-        u = self.uri
-        if u:
+        if u := self.uri:
             s += " <%s>" % u
         return s
diff --git a/cwlprov/tool.py b/cwlprov/tool.py
@@ -35,6 +35,7 @@
 import sys
 import tempfile
 import urllib.parse
+from collections.abc import Iterable, MutableMapping, MutableSequence
 from enum import IntEnum
 from functools import partial
 from pathlib import Path
@@ -44,10 +45,7 @@
     Callable,
     ContextManager,
     Dict,
-    Iterable,
     List,
-    MutableMapping,
-    MutableSequence,
     Optional,
     Set,
     TextIO,
@@ -131,7 +129,8 @@ class Status(IntEnum):
     MISSING_MANIFEST = 171
 
 
-def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
+def parse_args(args: Optional[list[str]] = None) -> argparse.Namespace:
+    """Parse the command line arguments."""
     parser = argparse.ArgumentParser(
         description="cwlprov explores Research Objects containing provenance of "
         "Common Workflow Language executions. <https://w3id.org/cwl/prov/>"
@@ -400,8 +399,8 @@ def _find_bagit_folder(folder: Optional[str] = None) -> Optional[pathlib.Path]:
         pfolder = pfolder.parent
 
 
-def _info_set(bag: BDBag, key: str) -> Set[Any]:
-    v: Union[str, List[Any]] = bag.info.get(key, [])
+def _info_set(bag: BDBag, key: str) -> set[Any]:
+    v: Union[str, list[Any]] = bag.info.get(key, [])
     if isinstance(v, list):
         return set(v)
     else:
@@ -412,7 +411,7 @@ def _simpler_uuid(uri: Any) -> str:
     return str(uri).replace("urn:uuid:", "")
 
 
-def _as_uuid(w: str) -> Tuple[str, Optional[UUID], str]:
+def _as_uuid(w: str) -> tuple[str, Optional[UUID], str]:
     try:
         uuid = UUID(w.replace("urn:uuid:", ""))
         return (uuid.urn, uuid, str(uuid))
@@ -487,8 +486,10 @@ def _set_log_level(quiet: Optional[bool] = None, verbose: int = 0) -> None:
 
 
 class Tool(ContextManager["Tool"]):
-    def __init__(self, args: Optional[List[str]] = None) -> None:
-        """Create a Tool and open the output stream."""
+    """The cwlprov-py tool."""
+
+    def __init__(self, args: Optional[list[str]] = None) -> None:
+        """Open the output stream."""
         self.args = parse_args(args)
         if self.args.output != "-":
             self.output: Optional[TextIO] = open(
@@ -509,7 +510,7 @@ def __enter__(self) -> "Tool":
 
     def __exit__(
         self,
-        exc_type: Optional[Type[BaseException]],
+        exc_type: Optional[type[BaseException]],
         exc_value: Optional[BaseException],
         traceback: Optional[TracebackType],
     ) -> Optional[bool]:
@@ -666,7 +667,7 @@ def main(self) -> int:
             return Status.OK
 
         # Else, find the other commands
-        COMMANDS: Dict[str, Callable[[], int]] = {
+        COMMANDS: dict[str, Callable[[], int]] = {
             "info": self.info,
             "who": self.who,
             "prov": self.prov,
@@ -702,7 +703,7 @@ def _absolute_or_relative_path(self, path: Any, absolute: bool = False) -> Path:
         else:
             return Path(p).absolute()
 
-    def _wf_id(self, run: Optional[str] = None) -> Tuple[str, Optional[UUID], str]:
+    def _wf_id(self, run: Optional[str] = None) -> tuple[str, Optional[UUID], str]:
         w = run or self.args.id or self.ro.workflow_id
         # ensure consistent UUID URIs
         return _as_uuid(str(w))
@@ -775,11 +776,9 @@ def info(self) -> int:
         }
         if cwlprov:
             self.print("Profile: %s" % many(cwlprov))
-        w = ro.workflow_id
-        if w:
+        if w := ro.workflow_id:
             self.print("Workflow run ID: %s" % w)
-        when = ro.bag.info.get("Bagging-Date")
-        if when:
+        if when := ro.bag.info.get("Bagging-Date"):
             self.print("Packaged: %s" % when)
         return Status.OK
 
@@ -923,15 +922,14 @@ def _load_provenance(self, wf_uri: str) -> int:
 
     def _load_activity_from_provenance(
         self,
-    ) -> Union[Tuple[Literal[Status.OK], Activity], Tuple[int, None]]:
+    ) -> Union[tuple[Literal[Status.OK], Activity], tuple[int, None]]:
         wf_uri, wf_uuid, wf_name = self._wf_id(self.args.run)
         a_uri, a_uuid, a_name = self._wf_id()
         error = self._load_provenance(wf_uri)
         if error != Status.OK:
             return (error, None)
 
-        activity = self.provenance.activity(a_uri)
-        if activity:
+        if activity := self.provenance.activity(a_uri):
             return (Status.OK, activity)
         else:
             _logger.error("Provenance does not describe step %s: %s", wf_name, a_uri)
@@ -986,7 +984,7 @@ def _inputs_or_outputs(self, is_inputs: bool) -> int:
         else:
             _logger.info("%ss for workflow %s", put_s, wf_name)
 
-        job: Dict[str, Dict[str, str]] = {}
+        job: dict[str, dict[str, str]] = {}
 
         if is_inputs:
             records: Iterable[Union[Generation, Usage]] = activity.usage()
@@ -1070,7 +1068,7 @@ def _entity_as_json(self, entity: Entity, absolute: bool = True) -> Any:
                 continue
             _logger.debug("entity %s bundledAs %s", file_candidate.uri, bundled)
             bundled_path = self._resource_path(bundled, absolute=absolute)
-            json: Dict[str, Any] = {
+            json: dict[str, Any] = {
                 "class": "File",
                 "path": str(bundled_path),
             }
@@ -1106,8 +1104,8 @@ def _entity_as_json(self, entity: Entity, absolute: bool = True) -> Any:
 
     def _inputs_or_outputs_job(
         self, activity: Activity, is_inputs: bool, absolute: bool
-    ) -> Dict[str, Any]:
-        job: Dict[str, Any] = {}
+    ) -> dict[str, Any]:
+        job: dict[str, Any] = {}
 
         if is_inputs:
             records: Iterable[Union[Usage, Generation]] = activity.usage()
@@ -1228,19 +1226,19 @@ def rerun(self) -> int:
 
         return self._exec_cwlrunner(wf_arg, job_file)
 
-    def _load_cwl(self, wf_file: Union[str, Path]) -> Optional[Dict[str, Any]]:
+    def _load_cwl(self, wf_file: Union[str, Path]) -> Optional[dict[str, Any]]:
         _logger.debug("Loading CWL as JSON: %s", wf_file)
         with open(wf_file) as f:
             # FIXME: Load as yaml in case it is not JSON?
-            cwl = cast(Dict[str, Any], json.load(f))
+            cwl = cast(dict[str, Any], json.load(f))
         ver = cwl["cwlVersion"]
         _logger.debug("Loaded CWL version: %s", ver)
         if not ver.startswith("v1."):
             _logger.fatal("Unsupported cwlVersion %s in %s", ver, wf_file)
             return None
         return cwl
 
-    def _find_step_run(self, cwl: Dict[str, Any], step_id: str) -> Any:
+    def _find_step_run(self, cwl: dict[str, Any], step_id: str) -> Any:
         step = find_dict_with_item(cwl, step_id)
         if not step:
             _logger.error("Could not find step for ")
@@ -1288,13 +1286,13 @@ def _find_primary_job(self) -> Path:
         p = self.ro.resolve_path(str(path))
         return p
 
-    def _recreate_job(self, activity: Activity, absolute: bool) -> Dict[str, Any]:
+    def _recreate_job(self, activity: Activity, absolute: bool) -> dict[str, Any]:
         # TODO: Actually do it
         job = self._inputs_or_outputs_job(activity, is_inputs=True, absolute=absolute)
         _logger.debug("Recreated job: %s", job)
         return job
 
-    def _temporary_job(self, job: Dict[str, Any]) -> str:
+    def _temporary_job(self, job: dict[str, Any]) -> str:
         with tempfile.NamedTemporaryFile(
             mode="w", prefix="rerun-", suffix=".json", delete=False, encoding="UTF-8"
         ) as f:
@@ -1509,7 +1507,8 @@ def run(self) -> int:
         return Status.OK
 
 
-def main(args: Optional[List[str]] = None) -> int:
+def main(args: Optional[list[str]] = None) -> int:
+    """Run the cwlprov-py tool."""
     with Tool(args) as tool:
         try:
             return tool.main()

diff --git a/cwlprov/utils.py b/cwlprov/utils.py
@@ -23,8 +23,9 @@
 )
 
 import datetime
+from collections.abc import Iterable, Sequence
 from functools import partial
-from typing import Any, Iterable, Optional, Sequence, Set, Tuple, TypeVar, Union
+from typing import Any, Optional, Set, Tuple, TypeVar, Union
 
 prov_type = Union[type, tuple[type]]
 
@@ -36,7 +37,7 @@ def first(iterable: Union[Iterable[_T], Sequence[_T]]) -> Optional[_T]:
     return next(iter(iterable), None)
 
 
-def many(s: Set[Any]) -> str:
+def many(s: set[Any]) -> str:
     """Convert a set of strings into a comma separated string."""
     return ", ".join(map(str, s))