Skip to content

Commit

Permalink
Merge branch 'develop_carsten' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
cknoll committed Nov 11, 2023
2 parents 4f85c99 + 29ff31c commit 318ee8d
Show file tree
Hide file tree
Showing 14 changed files with 1,081 additions and 42 deletions.
9 changes: 0 additions & 9 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,6 @@ jobs:
run: |
python -m pip install --upgrade pip
- name: get ocse as test data
run: |
mkdir -p erk-data-for-unittests
cd erk-data-for-unittests
echo "PYERK_TEST_DATA_PARENT_PATH=$(pwd)" >> "$GITHUB_ENV"
git clone https://github.com/ackrep-org/ocse.git
cd ocse
git checkout ut__pyerk__main
- name: debug env
run: |
echo "$PYERK_TEST_DATA_PARENT_PATH"
Expand Down
12 changes: 12 additions & 0 deletions docs/source/devdoc/overview.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
(sec_devdoc_overview)=
# pyerk Developer Documentation Overview



## Test Data


The directory `tests/test_data/ocse_subset` contains an autogenerated subset of the OCSE (ontology of control systems engineering) which is used as real world testing data.

Originally the "real" OCSE data was used for testing. This had the advantage of avoiding double maintaince effort (OCSE and testing data) but led to increased runtimes of the test suite with the growing number of entities and statements in the OCSE. This is because the test data is loaded and unloaded multiple times to keep the tests (mostly) independent.

To keep the test data small but avoid additional maintaince effort we now automatically extract the relevant entities and statements from the real ocse.

`pyerk -utd <path/to/ocse/dir>`
16 changes: 16 additions & 0 deletions src/pyerk/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,19 @@ def get_erk_root_dir() -> str:
dir_of_this_file = os.path.dirname(os.path.abspath(sys.modules.get(__name__).__file__))
erk_root = os.path.abspath(os.path.join(dir_of_this_file, "..", "..", ".."))
return erk_root


def get_erk_path(dirname=None):

if dirname is None:
return get_erk_root_dir()

dir_of_this_file = os.path.dirname(os.path.abspath(sys.modules.get(__name__).__file__))

# this assumes pyerk is installed with `pip install -e .` from the repo
pyerk_root = os.path.abspath(os.path.join(dir_of_this_file, "..", ".."))
if dirname == "pyerk-core-test_data":
return os.path.join(pyerk_root, "tests", "test_data")

msg = f"unexpected dirname: {dirname}"
raise ValueError(msg)
7 changes: 6 additions & 1 deletion src/pyerk/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,7 +2388,7 @@ def replace_and_unlink_entity(old_entity: Entity, new_entity: Entity):
return res


def register_mod(uri: str, keymanager: KeyManager, check_uri=True):
def register_mod(uri: str, keymanager: KeyManager, check_uri=True, prefix=None):
frame = get_caller_frame(upcount=1)
path = os.path.abspath(frame.f_globals["__file__"])
if check_uri:
Expand All @@ -2405,6 +2405,11 @@ def register_mod(uri: str, keymanager: KeyManager, check_uri=True):
# all modules should have their own key manager
ds.uri_keymanager_dict[uri] = keymanager

# currently this is only used from within unittests as they create test data on the fly and
# not use erkloader for every tiny item
if prefix:
ds.uri_prefix_mapping.add_pair(key_a=uri, key_b=prefix)


def start_mod(uri):
"""
Expand Down
139 changes: 135 additions & 4 deletions src/pyerk/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import argparse
from pathlib import Path
import re
from typing import Tuple
import ast
import inspect

try:
# this will be part of standard library for python >= 3.11
Expand Down Expand Up @@ -137,6 +140,13 @@ def create_parser():
metavar="path_to_mod"
)

parser.add_argument(
"-utd",
"--update-test-data",
help="create a subset of the erkpackage (e.g. OCSE) and store it in the `test_data` dir of pyerk-core",
metavar="path_to_erk_package"
)

parser.add_argument("--dbg", help="start debug routine", default=None, action="store_true")

parser.add_argument(
Expand Down Expand Up @@ -222,21 +232,23 @@ def main():
pyerkdjango.core.start_django_shell()
elif args.insert_keys_for_placeholders:
insert_keys_for_placeholders(args.insert_keys_for_placeholders)
elif args.update_test_data:
update_test_data(args.update_test_data)
else:
print("nothing to do, see option `--help` for more info")


def process_package(pkg_path: str) -> erkloader.ModuleType:
def process_package(pkg_path: str) -> Tuple[erkloader.ModuleType, str]:
if os.path.isdir(pkg_path):
pkg_path = os.path.join(pkg_path, "erkpackage.toml")

with open(pkg_path, "rb") as fp:
erk_conf_dict = tomllib.load(fp)
ocse_main_rel_path = erk_conf_dict["main_module"]
main_rel_path = erk_conf_dict["main_module"]
main_module_prefix = erk_conf_dict["main_module_prefix"]
ocse_main_mod_path = Path(pkg_path).parent.joinpath(ocse_main_rel_path).as_posix()
main_mod_path = Path(pkg_path).parent.joinpath(main_rel_path).as_posix()

mod = erkloader.load_mod_from_path(modpath=ocse_main_mod_path, prefix=main_module_prefix)
mod = erkloader.load_mod_from_path(modpath=main_mod_path, prefix=main_module_prefix)
return mod, main_module_prefix


Expand Down Expand Up @@ -397,6 +409,125 @@ def replace_dummy_enties_by_label(modpath):
fp.write(txt)


def update_test_data(pkg_path):
"""
Background: see devdocs
"""
import glob
mod, prefix = process_package(pkg_path)
mod_cont = path_to_ast_container(inspect.getfile(mod))

test_data_root = core.aux.get_erk_path("pyerk-core-test_data")
target_dir = os.path.join(test_data_root, "ocse_subset")
template_dir = os.path.join(target_dir, "templates")

template_files = glob.glob(os.path.join(template_dir, "*__template.py"))
for template_path in template_files:
rendered_template_txt = process_template(template_path)
fname = os.path.split(template_path)[-1].replace("__template", "")
target_path = os.path.join(target_dir, fname)
with open(target_path, "w") as fp:
fp.write(rendered_template_txt)
print(f"File written: {target_path}")


def process_template(template_path):

templ_ast_cont = path_to_ast_container(template_path)

# extract the uri-line
uri_line = templ_ast_cont.line_data["__URI__"]
tmp_locals = {}
exec(uri_line, {}, tmp_locals)
uri = tmp_locals["__URI__"]

original_mod_path = inspect.getfile(core.ds.uri_mod_dict[uri])

mod_ast_cont = path_to_ast_container(original_mod_path)

insert_key_lines = templ_ast_cont.line_data["insert_entities"].strip().split("\n")
assert insert_key_lines[0].strip() == "insert_entities = ["
assert insert_key_lines[-1].strip() == "]"

insert_key_lines = insert_key_lines[1:-1]

lines_to_insert = []

for line in insert_key_lines:
line = line.strip().strip(",")
if not line:
continue
elif line.startswith("#"):
continue
elif line.startswith("raw__"):
# handle raw lines
lines_to_insert.append(line[len("raw__"):])
lines_to_insert.append("\n"*3)
continue
elif line.startswith("with__"):
# handle context managers
short_key = line
elif line.startswith("def__"):
short_key = line[len("def__"):]
else:
# assume pyerk entity
short_key = core.process_key_str(line, check=False).short_key

original_content = mod_ast_cont.line_data[short_key]
if not isinstance(original_content, str) or original_content == "":
short_template_path, fname = os.path.split(template_path)
short_template_path = os.path.split(short_template_path)[-1]
short_template_path = os.path.join(short_template_path, fname)
msg = (
f"could not find associated data for short_key {short_key} while processing "
f"template line `{line}` in template {short_template_path}."
)
raise KeyError(msg)
lines_to_insert.append(original_content)
lines_to_insert.append("\n")

new_insert_txt = "".join(lines_to_insert)

rendered_template = templ_ast_cont.txt.replace(templ_ast_cont.line_data["insert_entities"], new_insert_txt)
return rendered_template


def path_to_ast_container(mod_path: str) -> core.aux.Container:

with open(mod_path) as fp:
lines = fp.readlines()

txt = "".join(lines)
c = core.aux.Container(ast=ast.parse(txt), lines=lines, line_data={}, txt=txt)

for elt in c.ast.body:
if isinstance(elt, ast.Assign):
name = elt.targets[0].id
elif isinstance(elt, (ast.FunctionDef, ast.ClassDef)):
name = elt.name
elif isinstance(elt, ast.With):
first_line = lines[elt.lineno-1]
# assume form like `with I9907.scope("setting") as cm:`
idx = first_line.index(" as ")
# create name string like `with__I9907.scope("setting")`
name = f"with__{first_line[len('with '):idx]}"
else:
continue

assert isinstance(name, str)

# subtract 1 because the line numberse are human-oriented (1-indexed)
src_txt = "".join(lines[elt.lineno-1:elt.end_lineno])
c.line_data[name] = src_txt

return c


def get_lines_for_short_key(short_key: str) -> str:
pass



def interactive_session(loaded_mod, prefix):
"""
Start an interactive IPython session where the (optinally) loaded mod is available under its prefix name.
Expand Down
7 changes: 3 additions & 4 deletions tests/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@
TEST_DATA_PARENT_PATH = os.getenv("PYERK_TEST_DATA_PARENT_PATH", default=pjoin(ERK_ROOT_DIR, "erk-data-for-unittests"))


TEST_DATA_REPO_PATH = pjoin(TEST_DATA_PARENT_PATH, "ocse")
TEST_DATA_PATH2 = pjoin(TEST_DATA_REPO_PATH, "control_theory1.py")
TEST_DATA_PATH_MA = pjoin(TEST_DATA_REPO_PATH, "math1.py")
TEST_DATA_PATH3 = pjoin(TEST_DATA_REPO_PATH, "agents1.py")
TEST_DATA_PATH2 = pjoin(TEST_DATA_DIR1, "ocse_subset", "control_theory1.py")
TEST_DATA_PATH_MA = pjoin(TEST_DATA_DIR1, "ocse_subset", "math1.py")
TEST_DATA_PATH3 = pjoin(TEST_DATA_DIR1, "ocse_subset", "agents1.py")
TEST_DATA_PATH_ZEBRA01 = pjoin(TEST_DATA_DIR1, "zebra01.py")
TEST_DATA_PATH_ZEBRA02 = pjoin(TEST_DATA_DIR1, "zebra02.py")
TEST_DATA_PATH_ZEBRA_BASE_DATA = pjoin(TEST_DATA_DIR1, "zebra_base_data.py")
Expand Down
33 changes: 9 additions & 24 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@
ERK_ROOT_DIR,
TEST_DATA_DIR1,
TEST_DATA_PARENT_PATH,
TEST_DATA_REPO_PATH,
TEST_DATA_PATH2,
TEST_DATA_PATH_MA,
TEST_DATA_PATH3,
TEST_DATA_PATH_ZEBRA_BASE_DATA,
TEST_DATA_PATH_ZEBRA02,
TEST_MOD_NAME,
TEST_DATA_REPO_COMMIT_SHA,
# TEST_ACKREP_DATA_FOR_UT_PATH,
TEST_BASE_URI,
WRITE_TMP_FILES,
Expand All @@ -40,19 +38,6 @@


class Test_00_Core(HouskeeperMixin, unittest.TestCase):
def test_a0__ensure_expected_test_data(self):
"""
Construct a list of all sha-strings which where commited in the current branch and assert that
the expected string is among them. This heuristics assumes that it is OK if the data-repo is newer than
expected. But the tests fails if it is older (or on a unexpeced branch).
"""

repo = git.Repo(TEST_DATA_REPO_PATH)
log_list = repo.git.log("--pretty=oneline").split("\n")
msg = f"Unexpected: could not find commit hash {TEST_DATA_REPO_COMMIT_SHA} in repo {TEST_DATA_REPO_PATH}"
sha_list = [line.split(" ")[0] for line in log_list]

self.assertIn(TEST_DATA_REPO_COMMIT_SHA, sha_list, msg=msg)

def test_a1__dependencyies(self):
# this tests checks some dependencies which are prone to cause problems (e.g. due to recent api-changes)
Expand Down Expand Up @@ -259,7 +244,7 @@ def test_a03_tear_down(self):
# (above noinspection is necessary because of the @-operator which is undecleared for strings)
def test_b00__core1_basics(self):
mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
self.assertEqual(mod1.I3749.R1.value, "Cayley-Hamilton theorem")
self.assertEqual(mod1.ma.I3749.R1.value, "Cayley-Hamilton theorem")

def_eq_item = mod1.I6886.R6__has_defining_mathematical_relation
self.assertEqual(def_eq_item.R4__is_instance_of, p.I18["mathematical expression"])
Expand Down Expand Up @@ -373,16 +358,16 @@ def test_c05__evaluated_mapping2(self):
mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")

with p.uri_context(uri=TEST_BASE_URI):
h = p.instance_of(mod1.I9923["scalar field"])
f = p.instance_of(mod1.I9841["vector field"])
h = p.instance_of(mod1.ma.I9923["scalar field"])
f = p.instance_of(mod1.ma.I9841["vector field"])
x = p.instance_of(mod1.I1168["point in state space"])

Lderiv = mod1.I1347["Lie derivative of scalar field"]

# this creates a new item (and thus must be executed with a non-empty uri stack, i.e. within this context)
h2 = Lderiv(h, f, x)

self.assertEqual(h2.R4__is_instance_of, mod1.I9923["scalar field"])
self.assertEqual(h2.R4__is_instance_of, mod1.ma.I9923["scalar field"])

arg_tup = h2.R36__has_argument_tuple
self.assertEqual(arg_tup.R4__is_instance_of, p.I33["tuple"])
Expand All @@ -409,7 +394,7 @@ def test_c07__scope_vars(self):

# this tests for a bug with labels of scope vars
_ = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")
def_itm = p.ds.get_entity_by_key_str("ct__I9907__definition_of_square_matrix")
def_itm = p.ds.get_entity_by_key_str("ma__I9907__definition_of_square_matrix")
matrix_instance = def_itm.M
self.assertEqual(matrix_instance.R1.value, "M")

Expand Down Expand Up @@ -681,10 +666,10 @@ def test_c11__equation(self):
mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, prefix="ct")

# get item via prefix and key
itm1: p.Item = p.ds.get_entity_by_key_str("ct__I3749__Cayley_Hamilton_theorem")
itm1: p.Item = p.ds.get_entity_by_key_str("ma__I3749__Cayley_Hamilton_theorem")

# get item via key and uri
itm2: p.Item = p.ds.get_entity_by_key_str("I3749__Cayley_Hamilton_theorem", mod_uri=mod1.__URI__)
itm2: p.Item = p.ds.get_entity_by_key_str("I3749__Cayley_Hamilton_theorem", mod_uri=mod1.ma.__URI__)

self.assertEqual(itm1, itm2)

Expand Down Expand Up @@ -829,7 +814,7 @@ def test_c14__visualization1(self):
mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, TEST_MOD_NAME)

# do not use something like "Ia3699" here directly because this might change when mod1 changes
auto_item: p.Item = mod1.I3749["Cayley-Hamilton theorem"].A
auto_item: p.Item = mod1.ma.I3749["Cayley-Hamilton theorem"].A
res_graph: visualization.nx.DiGraph = visualization.create_nx_graph_from_entity(auto_item.uri)
self.assertGreater(res_graph.number_of_nodes(), 7)

Expand All @@ -840,7 +825,7 @@ def test_c15__visualization2(self):
res = visualization.visualize_entity(p.u("I21__mathematical_relation"), write_tmp_files=WRITE_TMP_FILES)

mod1 = p.erkloader.load_mod_from_path(TEST_DATA_PATH2, TEST_MOD_NAME)
auto_item: p.Item = mod1.I3749["Cayley-Hamilton theorem"].P
auto_item: p.Item = mod1.ma.I3749["Cayley-Hamilton theorem"].P
res = visualization.visualize_entity(auto_item.uri, write_tmp_files=WRITE_TMP_FILES)

s1 = '<a href="">R35</a>'
Expand Down
5 changes: 5 additions & 0 deletions tests/test_data/ocse_subset/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This directory contains an autogenerated subset of the OCSE (ontology of control systems engineering) which is used as real world testing data.

Originally the "real" OCSE data was used for testing. This had the advantage of avoiding double maintaince effort (OCSE and testing data) but led to increased runtimes of the test suite with the growing number of entities and statements in the OCSE.

Solution: automatically extract the relevant entities and statements from the real ocse.

0 comments on commit 318ee8d

Please sign in to comment.