dasch-swiss · jnussbaum · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/.github/workflows/check-pr-title.yml b/.github/workflows/check-pr-title.yml
@@ -15,7 +15,7 @@ jobs:
         with:
           regex: 
             "^[a-z]+(\\([0-9a-z\\-_, ]+\\))?!?: .+\\(DEV-\\d+(, DEV-\\d+)*\\)$|\
-            ^chore.*"
+            ^(chore|refactor|style)(\\([0-9a-z\\-_, ]+\\))?!?: .+"
             # see here on how to cope with linebreaks in YAML: https://stackoverflow.com/a/21699210/14414188
           allowed_prefixes: "fix,refactor,feat,docs,chore,style,test" 
           disallowed_prefixes: "feature,hotfix" 

diff --git a/src/dsp_tools/analyse_xml_data/construct_and_analyze_graph.py b/src/dsp_tools/analyse_xml_data/construct_and_analyze_graph.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+from typing import Any
+
 import regex
 import rustworkx as rx
 from lxml import etree
@@ -84,12 +88,7 @@ def _extract_ids_from_one_text_value(text: etree._Element) -> set[str]:
 
 def make_graph(
     resptr_links: list[ResptrLink], xml_links: list[XMLLink], all_resource_ids: list[str]
-) -> tuple[  # type: ignore[type-arg]
-    rx.PyDiGraph,  # pylint: disable=no-member
-    dict[int, str],
-    list[tuple[int, int, ResptrLink | XMLLink]],
-    set[int],
-]:
+) -> tuple[rx.PyDiGraph[Any, Any], dict[int, str], list[tuple[int, int, ResptrLink | XMLLink]], set[int]]:
     """
     This function takes information about the resources (nodes) and links between them (edges).
     From that it constructs a rustworkx directed graph.
@@ -105,23 +104,22 @@ def make_graph(
     Returns:
         The rustworkx graph and a dictionary that contains the index number of the nodes with the original resource id
     """
-    g: rx.PyDiGraph = rx.PyDiGraph()  # type: ignore[type-arg] # pylint: disable=no-member
+    g: rx.PyDiGraph[Any, Any] = rx.PyDiGraph()  # pylint: disable=no-member
     nodes = [(id_, None, None) for id_ in all_resource_ids]
-    node_indices = g.add_nodes_from(nodes)
-    node_indices = list(node_indices)  # type: ignore[assignment]
+    node_indices = list(g.add_nodes_from(nodes))
     node_id_lookup = dict(zip(all_resource_ids, node_indices))
     node_index_lookup = dict(zip(node_indices, all_resource_ids))
     edges: list[tuple[int, int, ResptrLink | XMLLink]] = [
-        (node_id_lookup[x.subject_id], node_id_lookup[x.object_id], x) for x in resptr_links
+        (node_id_lookup[x.source_id], node_id_lookup[x.target_id], x) for x in resptr_links
     ]
     for xml in xml_links:
-        edges.extend([(node_id_lookup[xml.subject_id], node_id_lookup[x], xml) for x in xml.object_link_ids])
+        edges.extend([(node_id_lookup[xml.source_id], node_id_lookup[x], xml) for x in xml.target_ids])
     g.add_edges_from(edges)
     return g, node_index_lookup, edges, set(node_indices)
 
 
 def _remove_leaf_nodes(
-    g: rx.PyDiGraph,  # type: ignore[type-arg] # pylint: disable=no-member
+    g: rx.PyDiGraph[Any, Any],
     node_index_lookup: dict[int, str],
     node_indices: set[int],
 ) -> tuple[list[str], set[int]]:
@@ -148,7 +146,7 @@ def _remove_leaf_nodes(
 
 
 def _find_cheapest_outgoing_links(
-    g: rx.PyDiGraph,  # type: ignore[type-arg] # pylint: disable=no-member
+    g: rx.PyDiGraph[Any, Any],
     cycle: list[tuple[int, int]],
     edge_list: list[tuple[int, int, XMLLink | ResptrLink]],
 ) -> list[tuple[int, int, XMLLink | ResptrLink]]:
@@ -179,7 +177,7 @@ def _find_cheapest_outgoing_links(
 
 
 def _remove_edges_to_stash(
-    g: rx.PyDiGraph,  # type: ignore[type-arg] # pylint: disable=no-member,
+    g: rx.PyDiGraph[Any, Any],
     edges_to_remove: list[tuple[int, int, XMLLink | ResptrLink]],
     edge_list: list[tuple[int, int, XMLLink | ResptrLink]],
     remaining_nodes: set[int],
@@ -246,39 +244,37 @@ def check(x: tuple[int, int, XMLLink | ResptrLink]) -> bool:
 
 
 def _add_stash_to_lookup_dict(
-    stash_dict: dict[str, list[str]], to_stash_links: list[XMLLink | ResptrLink]
+    stash_dict: dict[str, list[str]], links_to_stash: list[XMLLink | ResptrLink]
 ) -> dict[str, list[str]]:
-    stash_list = [stash_link.link_uuid for stash_link in to_stash_links]
+    stash_list = [stash_link.link_uuid for stash_link in links_to_stash]
     # all stashed links have the same subject id, so we can just take the first one
-    subj_id = to_stash_links[0].subject_id
-    if subj_id in stash_dict.keys():
+    subj_id = links_to_stash[0].source_id
+    if subj_id in stash_dict:
         stash_dict[subj_id].extend(stash_list)
     else:
         stash_dict[subj_id] = stash_list
     return stash_dict
 
 
 def generate_upload_order(
-    g: rx.PyDiGraph,  # type: ignore[type-arg] # pylint: disable=no-member
+    g: rx.PyDiGraph[Any, Any],
     node_index_lookup: dict[int, str],
     edge_list: list[tuple[int, int, XMLLink | ResptrLink]],
     node_indices: set[int],
 ) -> tuple[dict[str, list[str]], list[str], int]:
     """
-    This function takes a graph and a dictionary with the mapping between the graph indices and original ids.
-    It generates the order in which the resources should be uploaded to the DSP-API based on the dependencies.
+    Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
 
     Args:
         g: graph
-        node_index_lookup: reference between graph indices and original id
-        edge_list: list of edges in the graph as tuple (source_node, target_node, Class Instance)
+        node_index_lookup: mapping between graph indices and original IDs
+        edge_list: list of edges in the graph as tuple (source node, target node, link info)
         node_indices: index numbers of the nodes still in the graph
 
     Returns:
-        Dictionary, which stores the information which resources have stashes
-        and which UUIDs of the elements should be stashed
-        A list that of resource IDs which gives the order in which the resources should be uploaded in the API
-        The number of links in the stash.
+        Dictionary that maps the resources that have stashes to the UUIDs of the stashed links
+        A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API
+        The number of links in the stash
     """
     upload_order: list[str] = []
     stash_lookup: dict[str, list[str]] = {}

diff --git a/src/dsp_tools/analyse_xml_data/models.py b/src/dsp_tools/analyse_xml_data/models.py
@@ -10,13 +10,13 @@ class ResptrLink:
     This class represents a direct link (resptr) between a starting resource and a target resource.
 
     Args:
-        subject_id: resource ID that is in subject position of the triple
-        object_id: resource ID that is in object position of the triple
-        link_uuid: each link, which is represented in the graph gets a UUID
+        source_id: ID of the resource from which the link originates
+        target_id: ID of the resource where the link points to
+        link_uuid: identifier of this link
     """
 
-    subject_id: str
-    object_id: str
+    source_id: str
+    target_id: str
     link_uuid: str = field(default_factory=lambda: str(uuid.uuid4()))
 
     @property
@@ -28,20 +28,20 @@ def cost_links(self) -> float:
 @dataclass(frozen=True)
 class XMLLink:
     """
-    This class represents one or more links from a single starting resource to a set of target resources,
-    where all target resources are linked to from a single text value on the starting resource.
+    This class represents one or more links from a starting resource to a set of target resources,
+    where all target resources are linked to from a single text value of the starting resource.
 
     Args:
-        subject_id: resource ID that is in subject position of the triple
-        object_link_ids: a set that contains the resource IDs which were embedded in the <text> element
-        link_uuid: each link, which is represented in the graph gets a UUID
+        source_id: ID of the resource from which the link(s) originate
+        target_ids: IDs of the resources that are referenced in the text value
+        link_uuid: identifier of this link
     """
 
-    subject_id: str
-    object_link_ids: set[str]
+    source_id: str
+    target_ids: set[str]
     link_uuid: str = field(default_factory=lambda: str(uuid.uuid4()))
 
     @property
     def cost_links(self) -> float:
         """The cost of this outgoing link (1 / number of links in the XML text)"""
-        return 1 / len(self.object_link_ids)
+        return 1 / len(self.target_ids)
diff --git a/test/unittests/test_analyse_xml_data/test_construct_and_analyze_graph.py b/test/unittests/test_analyse_xml_data/test_construct_and_analyze_graph.py
@@ -41,12 +41,12 @@ def test_create_info_from_xml_for_graph_from_one_resource() -> None:
         </resource>"""
     )
     res_resptr_links, res_xml_links, subject_id = _create_info_from_xml_for_graph_from_one_resource(test_ele)
-    res_B_19 = [obj.object_id for obj in res_resptr_links]
+    res_B_19 = [obj.target_id for obj in res_resptr_links]
     assert "res_B_19" in res_B_19
     assert "res_C_19" in res_B_19
     assert "res_A_19" == subject_id
-    assert res_xml_links[0].subject_id == "res_A_19"
-    assert res_xml_links[0].object_link_ids == {"res_B_19", "res_C_19"}
+    assert res_xml_links[0].source_id == "res_A_19"
+    assert res_xml_links[0].target_ids == {"res_B_19", "res_C_19"}
 
 
 def test_create_info_from_xml_for_graph_from_one_resource_one() -> None:
@@ -67,9 +67,9 @@ def test_create_info_from_xml_for_graph_from_one_resource_one() -> None:
     )
     res_resptr, res_xml, subject_id = _create_info_from_xml_for_graph_from_one_resource(test_ele)
     assert subject_id == "res_A_11"
-    assert res_resptr[0].object_id == "res_B_11"
+    assert res_resptr[0].target_id == "res_B_11"
     assert isinstance(res_resptr[0], ResptrLink)
-    assert res_xml[0].object_link_ids == {"res_B_11"}
+    assert res_xml[0].target_ids == {"res_B_11"}
     assert isinstance(res_xml[0], XMLLink)
 
 
@@ -101,7 +101,7 @@ def test_text_only_create_info_from_xml_for_graph_from_one_resource() -> None:
     res_resptr, res_xml, subject_id = _create_info_from_xml_for_graph_from_one_resource(test_ele)
     assert subject_id == "res_C_18"
     assert not res_resptr
-    res_xml_ids = [x.object_link_ids for x in res_xml]
+    res_xml_ids = [x.target_ids for x in res_xml]
     assert unordered(res_xml_ids) == [{"res_A_18"}, {"res_B_18"}]
 
 
@@ -148,7 +148,7 @@ def test_extract_ids_from_text_prop_with_several_text_links() -> None:
         'class="salsah-link" href="IRI:res_B_18:IRI">res_B_18</a></text></text-prop>'
     )
     res = _create_text_link_objects("res_C_18", test_ele)
-    res_ids = [x.object_link_ids for x in res]
+    res_ids = [x.target_ids for x in res]
     assert unordered(res_ids) == [{"res_A_18"}, {"res_B_18"}]
 
 
@@ -162,7 +162,7 @@ def test_create_class_instance_resptr_link_one_link() -> None:
         """
     )
     res = _create_resptr_link_objects("res_A_15", test_ele)
-    assert res[0].object_id == "res_C_15"
+    assert res[0].target_id == "res_C_15"
 
 
 def test_create_class_instance_resptr_link_several() -> None:
@@ -178,9 +178,9 @@ def test_create_class_instance_resptr_link_several() -> None:
     )
     res = _create_resptr_link_objects("res_D_13", test_ele)
     assert all(isinstance(x, ResptrLink) for x in res)
-    assert res[0].object_id == "res_A_13"
-    assert res[1].object_id == "res_B_13"
-    assert res[2].object_id == "res_C_13"
+    assert res[0].target_id == "res_A_13"
+    assert res[1].target_id == "res_B_13"
+    assert res[2].target_id == "res_C_13"
 
 
 def test_create_info_from_xml_for_graph_check_UUID_in_root() -> None: