Merge branch 'develop_carsten' into develop

ackrep-org · Nov 11, 2023 · 318ee8d · 318ee8d
2 parents 4f85c99 + 29ff31c
commit 318ee8d
Show file tree

Hide file tree

Showing 14 changed files with 1,081 additions and 42 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -38,15 +38,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
 
-    - name: get ocse as test data
-      run: |
-        mkdir -p erk-data-for-unittests
-        cd erk-data-for-unittests
-        echo "PYERK_TEST_DATA_PARENT_PATH=$(pwd)" >> "$GITHUB_ENV"
-        git clone https://github.com/ackrep-org/ocse.git
-        cd ocse
-        git checkout ut__pyerk__main
-
     - name: debug env
       run: |
         echo "$PYERK_TEST_DATA_PARENT_PATH"

diff --git a/docs/source/devdoc/overview.md b/docs/source/devdoc/overview.md
@@ -1,3 +1,15 @@
 (sec_devdoc_overview)=
 # pyerk Developer Documentation Overview
 
+
+
+## Test Data
+
+
+The directory `tests/test_data/ocse_subset` contains an autogenerated subset of the OCSE (ontology of control systems engineering) which is used as real world testing data.
+
+Originally the "real" OCSE data was used for testing. This had the advantage of avoiding double maintaince effort (OCSE  and testing data) but led to increased runtimes of the test suite with the growing number of entities and statements in the OCSE. This is because the test data is loaded and unloaded multiple times to keep the tests (mostly) independent.
+
+To keep the test data small but avoid additional maintaince effort we now automatically extract the relevant entities and statements from the real ocse.
+
+`pyerk -utd <path/to/ocse/dir>`
diff --git a/src/pyerk/auxiliary.py b/src/pyerk/auxiliary.py
@@ -440,3 +440,19 @@ def get_erk_root_dir() -> str:
     dir_of_this_file = os.path.dirname(os.path.abspath(sys.modules.get(__name__).__file__))
     erk_root = os.path.abspath(os.path.join(dir_of_this_file, "..", "..", ".."))
     return erk_root
+
+
+def get_erk_path(dirname=None):
+
+    if dirname is None:
+        return get_erk_root_dir()
+
+    dir_of_this_file = os.path.dirname(os.path.abspath(sys.modules.get(__name__).__file__))
+
+    # this assumes pyerk is installed with `pip install -e .` from the repo
+    pyerk_root = os.path.abspath(os.path.join(dir_of_this_file, "..", ".."))
+    if dirname == "pyerk-core-test_data":
+        return os.path.join(pyerk_root, "tests", "test_data")
+
+    msg = f"unexpected dirname: {dirname}"
+    raise ValueError(msg)
diff --git a/src/pyerk/core.py b/src/pyerk/core.py
@@ -2388,7 +2388,7 @@ def replace_and_unlink_entity(old_entity: Entity, new_entity: Entity):
     return res
 
 
-def register_mod(uri: str, keymanager: KeyManager, check_uri=True):
+def register_mod(uri: str, keymanager: KeyManager, check_uri=True, prefix=None):
     frame = get_caller_frame(upcount=1)
     path = os.path.abspath(frame.f_globals["__file__"])
     if check_uri:
@@ -2405,6 +2405,11 @@ def register_mod(uri: str, keymanager: KeyManager, check_uri=True):
     # all modules should have their own key manager
     ds.uri_keymanager_dict[uri] = keymanager
 
+    # currently this is only used from within unittests as they create test data on the fly and
+    # not use erkloader for every tiny item
+    if prefix:
+        ds.uri_prefix_mapping.add_pair(key_a=uri, key_b=prefix)
+
 
 def start_mod(uri):
     """

diff --git a/src/pyerk/script.py b/src/pyerk/script.py
@@ -5,6 +5,9 @@
 import argparse
 from pathlib import Path
 import re
+from typing import Tuple
+import ast
+import inspect
 
 try:
     # this will be part of standard library for python >= 3.11
@@ -137,6 +140,13 @@ def create_parser():
         metavar="path_to_mod"
     )
 
+    parser.add_argument(
+        "-utd",
+        "--update-test-data",
+        help="create a subset of the erkpackage (e.g. OCSE) and store it in the `test_data` dir of pyerk-core",
+        metavar="path_to_erk_package"
+    )
+
     parser.add_argument("--dbg", help="start debug routine", default=None, action="store_true")
 
     parser.add_argument(
@@ -222,21 +232,23 @@ def main():
         pyerkdjango.core.start_django_shell()
     elif args.insert_keys_for_placeholders:
         insert_keys_for_placeholders(args.insert_keys_for_placeholders)
+    elif args.update_test_data:
+        update_test_data(args.update_test_data)
     else:
         print("nothing to do, see option `--help` for more info")
 
 
-def process_package(pkg_path: str) -> erkloader.ModuleType:
+def process_package(pkg_path: str) -> Tuple[erkloader.ModuleType, str]:
     if os.path.isdir(pkg_path):
         pkg_path = os.path.join(pkg_path, "erkpackage.toml")
 
     with open(pkg_path, "rb") as fp:
         erk_conf_dict = tomllib.load(fp)
-    ocse_main_rel_path = erk_conf_dict["main_module"]
+    main_rel_path = erk_conf_dict["main_module"]
     main_module_prefix = erk_conf_dict["main_module_prefix"]
-    ocse_main_mod_path = Path(pkg_path).parent.joinpath(ocse_main_rel_path).as_posix()
+    main_mod_path = Path(pkg_path).parent.joinpath(main_rel_path).as_posix()
 
-    mod = erkloader.load_mod_from_path(modpath=ocse_main_mod_path, prefix=main_module_prefix)
+    mod = erkloader.load_mod_from_path(modpath=main_mod_path, prefix=main_module_prefix)
     return mod, main_module_prefix
 
 
@@ -397,6 +409,125 @@ def replace_dummy_enties_by_label(modpath):
         fp.write(txt)
 
 
+def update_test_data(pkg_path):
+    """
+    Background: see devdocs
+    """
+    import glob
+    mod, prefix = process_package(pkg_path)
+    mod_cont = path_to_ast_container(inspect.getfile(mod))
+
+    test_data_root = core.aux.get_erk_path("pyerk-core-test_data")
+    target_dir = os.path.join(test_data_root, "ocse_subset")
+    template_dir = os.path.join(target_dir, "templates")
+
+    template_files = glob.glob(os.path.join(template_dir, "*__template.py"))
+    for template_path in template_files:
+        rendered_template_txt = process_template(template_path)
+        fname = os.path.split(template_path)[-1].replace("__template", "")
+        target_path = os.path.join(target_dir, fname)
+        with open(target_path, "w") as fp:
+            fp.write(rendered_template_txt)
+            print(f"File written: {target_path}")
+
+
+def process_template(template_path):
+
+    templ_ast_cont = path_to_ast_container(template_path)
+
+    # extract the uri-line
+    uri_line = templ_ast_cont.line_data["__URI__"]
+    tmp_locals = {}
+    exec(uri_line, {}, tmp_locals)
+    uri = tmp_locals["__URI__"]
+
+    original_mod_path = inspect.getfile(core.ds.uri_mod_dict[uri])
+
+    mod_ast_cont = path_to_ast_container(original_mod_path)
+
+    insert_key_lines = templ_ast_cont.line_data["insert_entities"].strip().split("\n")
+    assert insert_key_lines[0].strip() == "insert_entities = ["
+    assert insert_key_lines[-1].strip() == "]"
+
+    insert_key_lines = insert_key_lines[1:-1]
+
+    lines_to_insert = []
+
+    for line in insert_key_lines:
+        line = line.strip().strip(",")
+        if not line:
+            continue
+        elif line.startswith("#"):
+            continue
+        elif line.startswith("raw__"):
+            # handle raw lines
+            lines_to_insert.append(line[len("raw__"):])
+            lines_to_insert.append("\n"*3)
+            continue
+        elif line.startswith("with__"):
+            # handle context managers
+            short_key = line
+        elif line.startswith("def__"):
+            short_key = line[len("def__"):]
+        else:
+            # assume pyerk entity
+            short_key = core.process_key_str(line, check=False).short_key
+
+        original_content = mod_ast_cont.line_data[short_key]
+        if not isinstance(original_content, str) or original_content == "":
+            short_template_path, fname = os.path.split(template_path)
+            short_template_path = os.path.split(short_template_path)[-1]
+            short_template_path = os.path.join(short_template_path, fname)
+            msg = (
+            f"could not find associated data for short_key {short_key} while processing "
+            f"template line `{line}` in template {short_template_path}."
+            )
+            raise KeyError(msg)
+        lines_to_insert.append(original_content)
+        lines_to_insert.append("\n")
+
+    new_insert_txt = "".join(lines_to_insert)
+
+    rendered_template = templ_ast_cont.txt.replace(templ_ast_cont.line_data["insert_entities"], new_insert_txt)
+    return rendered_template
+
+
+def path_to_ast_container(mod_path: str) -> core.aux.Container:
+
+    with open(mod_path) as fp:
+        lines = fp.readlines()
+
+    txt = "".join(lines)
+    c = core.aux.Container(ast=ast.parse(txt), lines=lines, line_data={}, txt=txt)
+
+    for elt in c.ast.body:
+        if isinstance(elt, ast.Assign):
+            name = elt.targets[0].id
+        elif isinstance(elt, (ast.FunctionDef, ast.ClassDef)):
+            name = elt.name
+        elif isinstance(elt, ast.With):
+            first_line = lines[elt.lineno-1]
+            # assume form like `with I9907.scope("setting") as cm:`
+            idx = first_line.index(" as ")
+            # create name string like `with__I9907.scope("setting")`
+            name = f"with__{first_line[len('with '):idx]}"
+        else:
+            continue
+
+        assert isinstance(name, str)
+
+        # subtract 1 because the line numberse are human-oriented (1-indexed)
+        src_txt = "".join(lines[elt.lineno-1:elt.end_lineno])
+        c.line_data[name] = src_txt
+
+    return c
+
+
+def get_lines_for_short_key(short_key: str) -> str:
+    pass
+
+
+
 def interactive_session(loaded_mod, prefix):
     """
     Start an interactive IPython session where the (optinally) loaded mod is available under its prefix name.

diff --git a/tests/settings.py b/tests/settings.py
@@ -29,10 +29,9 @@
 TEST_DATA_PARENT_PATH = os.getenv("PYERK_TEST_DATA_PARENT_PATH", default=pjoin(ERK_ROOT_DIR, "erk-data-for-unittests"))
 
 
-TEST_DATA_REPO_PATH = pjoin(TEST_DATA_PARENT_PATH, "ocse")
-TEST_DATA_PATH2 = pjoin(TEST_DATA_REPO_PATH, "control_theory1.py")
-TEST_DATA_PATH_MA = pjoin(TEST_DATA_REPO_PATH, "math1.py")
-TEST_DATA_PATH3 = pjoin(TEST_DATA_REPO_PATH, "agents1.py")
+TEST_DATA_PATH2 = pjoin(TEST_DATA_DIR1, "ocse_subset", "control_theory1.py")
+TEST_DATA_PATH_MA = pjoin(TEST_DATA_DIR1, "ocse_subset", "math1.py")
+TEST_DATA_PATH3 = pjoin(TEST_DATA_DIR1, "ocse_subset", "agents1.py")
 TEST_DATA_PATH_ZEBRA01 = pjoin(TEST_DATA_DIR1, "zebra01.py")
 TEST_DATA_PATH_ZEBRA02 = pjoin(TEST_DATA_DIR1, "zebra02.py")
 TEST_DATA_PATH_ZEBRA_BASE_DATA = pjoin(TEST_DATA_DIR1, "zebra_base_data.py")

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -21,14 +21,12 @@
     ERK_ROOT_DIR,
     TEST_DATA_DIR1,
     TEST_DATA_PARENT_PATH,
-    TEST_DATA_REPO_PATH,
     TEST_DATA_PATH2,
     TEST_DATA_PATH_MA,
     TEST_DATA_PATH3,
     TEST_DATA_PATH_ZEBRA_BASE_DATA,
     TEST_DATA_PATH_ZEBRA02,
     TEST_MOD_NAME,
-    TEST_DATA_REPO_COMMIT_SHA,
     # TEST_ACKREP_DATA_FOR_UT_PATH,
     TEST_BASE_URI,
     WRITE_TMP_FILES,
@@ -40,19 +38,6 @@
 
 
 class Test_00_Core(HouskeeperMixin, unittest.TestCase):
-    def test_a0__ensure_expected_test_data(self):
-        """
-        Construct a list of all sha-strings which where commited in the current branch and assert that
-        the expected string is among them. This heuristics assumes that it is OK if the data-repo is newer than
-        expected. But the tests fails if it is older (or on a unexpeced branch).
-        """
-
-        repo = git.Repo(TEST_DATA_REPO_PATH)
-        log_list = repo.git.log("--pretty=oneline").split("\n")
-        msg = f"Unexpected: could not find commit hash {TEST_DATA_REPO_COMMIT_SHA} in repo {TEST_DATA_REPO_PATH}"
-        sha_list = [line.split(" ")[0] for line in log_list]
-
-        self.assertIn(TEST_DATA_REPO_COMMIT_SHA, sha_list, msg=msg)
 
     def test_a1__dependencyies(self):
         # this tests checks some dependencies which are prone to cause problems (e.g. due to recent api-changes)
@@ -259,7 +244,7 @@ def test_a03_tear_down(self):
     # (above noinspection is necessary because of the @-operator which is undecleared for strings)
     def test_b00__core1_basics(self):
         mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
-        self.assertEqual(mod1.I3749.R1.value, "Cayley-Hamilton theorem")
+        self.assertEqual(mod1.ma.I3749.R1.value, "Cayley-Hamilton theorem")
 
         def_eq_item = mod1.I6886.R6__has_defining_mathematical_relation
         self.assertEqual(def_eq_item.R4__is_instance_of, p.I18["mathematical expression"])
@@ -373,16 +358,16 @@ def test_c05__evaluated_mapping2(self):
         mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
 
         with p.uri_context(uri=TEST_BASE_URI):
-            h = p.instance_of(mod1.I9923["scalar field"])
-            f = p.instance_of(mod1.I9841["vector field"])
+            h = p.instance_of(mod1.ma.I9923["scalar field"])
+            f = p.instance_of(mod1.ma.I9841["vector field"])
             x = p.instance_of(mod1.I1168["point in state space"])
 
             Lderiv = mod1.I1347["Lie derivative of scalar field"]
 
             # this creates a new item (and thus must be executed with a non-empty uri stack, i.e. within this context)
             h2 = Lderiv(h, f, x)
 
-        self.assertEqual(h2.R4__is_instance_of, mod1.I9923["scalar field"])
+        self.assertEqual(h2.R4__is_instance_of, mod1.ma.I9923["scalar field"])
 
         arg_tup = h2.R36__has_argument_tuple
         self.assertEqual(arg_tup.R4__is_instance_of, p.I33["tuple"])
@@ -409,7 +394,7 @@ def test_c07__scope_vars(self):
 
         # this tests for a bug with labels of scope vars
         _ = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
-        def_itm = p.ds.get_entity_by_key_str("ct__I9907__definition_of_square_matrix")
+        def_itm = p.ds.get_entity_by_key_str("ma__I9907__definition_of_square_matrix")
         matrix_instance = def_itm.M
         self.assertEqual(matrix_instance.R1.value, "M")
 
@@ -681,10 +666,10 @@ def test_c11__equation(self):
         mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
 
         # get item via prefix and key
-        itm1: p.Item = p.ds.get_entity_by_key_str("ct__I3749__Cayley_Hamilton_theorem")
+        itm1: p.Item = p.ds.get_entity_by_key_str("ma__I3749__Cayley_Hamilton_theorem")
 
         # get item via key and uri
-        itm2: p.Item = p.ds.get_entity_by_key_str("I3749__Cayley_Hamilton_theorem", mod_uri=mod1.__URI__)
+        itm2: p.Item = p.ds.get_entity_by_key_str("I3749__Cayley_Hamilton_theorem", mod_uri=mod1.ma.__URI__)
 
         self.assertEqual(itm1, itm2)
 
@@ -829,7 +814,7 @@ def test_c14__visualization1(self):
         mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, TEST_MOD_NAME)
 
         # do not use something like "Ia3699" here directly because this might change when mod1 changes
-        auto_item: p.Item = mod1.I3749["Cayley-Hamilton theorem"].A
+        auto_item: p.Item = mod1.ma.I3749["Cayley-Hamilton theorem"].A
         res_graph: visualization.nx.DiGraph = visualization.create_nx_graph_from_entity(auto_item.uri)
         self.assertGreater(res_graph.number_of_nodes(), 7)
 
@@ -840,7 +825,7 @@ def test_c15__visualization2(self):
         res = visualization.visualize_entity(p.u("I21__mathematical_relation"), write_tmp_files=WRITE_TMP_FILES)
 
         mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, TEST_MOD_NAME)
-        auto_item: p.Item = mod1.I3749["Cayley-Hamilton theorem"].P
+        auto_item: p.Item = mod1.ma.I3749["Cayley-Hamilton theorem"].P
         res = visualization.visualize_entity(auto_item.uri, write_tmp_files=WRITE_TMP_FILES)
 
         s1 = '<a href="">R35</a>'

diff --git a/tests/test_data/ocse_subset/README.md b/tests/test_data/ocse_subset/README.md
@@ -0,0 +1,5 @@
+This directory contains an autogenerated subset of the OCSE (ontology of control systems engineering) which is used as real world testing data.
+
+Originally the "real" OCSE data was used for testing. This had the advantage of avoiding double maintaince effort (OCSE  and testing data) but led to increased runtimes of the test suite with the growing number of entities and statements in the OCSE.
+
+Solution: automatically extract the relevant entities and statements from the real ocse.